library(forecast)
library(seasonal)
## 
## Attaching package: 'seasonal'
## The following object is masked from 'package:tibble':
## 
##     view
library(fpp2)
## Loading required package: fma
## 
## Attaching package: 'fma'
## The following object is masked from 'package:plyr':
## 
##     ozone
## Loading required package: expsmooth
library(broom)
library(tidyverse)
library(sweep)
library(tseries)
detach("package:dplyr", unload=TRUE)
## Warning: 'dplyr' namespace cannot be unloaded:
##   namespace 'dplyr' is imported by 'timetk', 'tigris', 'broom', 'tidycensus', 'sweep' so cannot be unloaded
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
## 
##     first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
library(GGally)
## 
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
## 
##     nasa
## The following object is masked from 'package:fma':
## 
##     pigs
library(e1071)
library(fpp2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
theme_set(theme_minimal())

6.1 Time Series Components

Additive decomposition model

\(y_t = S_t + T_t + R_t\)

Multiplicative decompisition model

$y_t = S_t T_t R_t $

\(y_t\) is the data, \(S_t\) is the seasonal component, \(T_t\) is the trend-cycle component, the \(R_t\) is the remainder component.

ATTENTION: The additive decomposition is the most appropriate if the magnitude of the seaosnal fluctuations or the variation around the trend-cycle does not vary of the times series.

Seasonally adjusted data

ATTENTION: if the variation due to seasonality is not of primary interest, the seasonally adjusted series can be useful.

6.2 Moving Averages

Moving average smoothing

\(\hat{T_t} = \frac{1}{m}\sum_{j = -k}^{k}y_{t+j}\)

where \(m = 2k+1\)

autoplot(elecsales) +
  labs(x = "Year", y = "Gwh",
       title = "Annual Electricity Sales: South Australia")

x <- data.frame(time = index(elecsales), 
                data = as.numeric(elecsales))

x <- x %>% 
  mutate(data_5ma = rollmean(data, k = 5, fill = NA))

knitr::kable(x, format = "html")
time data data_5ma
1989 2354.34 NA
1990 2379.71 NA
1991 2318.52 2381.530
1992 2468.99 2424.556
1993 2386.09 2463.758
1994 2569.47 2552.598
1995 2575.72 2627.700
1996 2762.72 2750.622
1997 2844.50 2858.348
1998 3000.70 3014.704
1999 3108.10 3077.300
2000 3357.50 3144.520
2001 3075.70 3188.700
2002 3180.60 3202.320
2003 3221.60 3216.940
2004 3176.20 3307.296
2005 3430.60 3398.754
2006 3527.48 3485.434
2007 3637.89 NA
2008 3655.00 NA

There are no values for either the first two ears of the last two year, because we do not have two observations on either side.

autoplot(elecsales, series = "Data") +
  autolayer(ma(elecsales, 5), series = "5-MA") +
  labs(x = "year", y = "GWH",
       title = "Annual electricity series: South Australia") 
## Warning: Removed 4 rows containing missing values (geom_path).

Moving avetages of moving averages

beer2 <- window(ausbeer, start = 1992)

beer2_data <- data.frame(date = index(beer2),
                         observation = as.numeric(beer2))

beer2_data %>%
  mutate(ma4 = ma(observation, order = 4, centre = F),
         ma2_4 = ma(observation, order = 4, centre = T)) %>%
  knitr::kable()
date observation ma4 ma2_4
1992.00 443 NA NA
1992.25 410 451.25 NA
1992.50 420 448.75 450.000
1992.75 532 451.50 450.125
1993.00 433 449.00 450.250
1993.25 421 444.00 446.500
1993.50 410 448.00 446.000
1993.75 512 438.00 443.000
1994.00 449 441.25 439.625
1994.25 381 446.00 443.625
1994.50 423 440.25 443.125
1994.75 531 447.00 443.625
1995.00 426 445.25 446.125
1995.25 408 442.50 443.875
1995.50 416 438.25 440.375
1995.75 520 435.75 437.000
1996.00 409 431.25 433.500
1996.25 398 428.00 429.625
1996.50 398 433.75 430.875
1996.75 507 433.75 433.750
1997.00 432 435.75 434.750
1997.25 398 440.50 438.125
1997.50 406 439.50 440.000
1997.75 526 439.25 439.375
1998.00 428 438.50 438.875
1998.25 397 436.25 437.375
1998.50 403 438.00 437.125
1998.75 517 434.50 436.250
1999.00 435 439.75 437.125
1999.25 383 440.75 440.250
1999.50 424 437.25 439.000
1999.75 521 442.00 439.625
2000.00 421 439.50 440.750
2000.25 402 434.25 436.875
2000.50 414 441.75 438.000
2000.75 500 436.25 439.000
2001.00 451 436.75 436.500
2001.25 380 434.75 435.750
2001.50 416 429.00 431.875
2001.75 492 436.00 432.500
2002.00 428 433.50 434.750
2002.25 408 437.00 435.250
2002.50 406 438.75 437.875
2002.75 506 431.75 435.250
2003.00 435 435.50 433.625
2003.25 380 431.50 433.500
2003.50 421 431.50 431.500
2003.75 490 434.00 432.750
2004.00 435 431.75 432.875
2004.25 390 422.75 427.250
2004.50 412 418.00 420.375
2004.75 454 421.25 419.625
2005.00 416 420.25 420.750
2005.25 403 427.25 423.750
2005.50 408 432.75 430.000
2005.75 482 428.50 430.625
2006.00 438 427.75 428.125
2006.25 386 430.00 428.875
2006.50 405 427.25 428.625
2006.75 491 426.50 426.875
2007.00 427 423.75 425.125
2007.25 383 419.25 421.500
2007.50 394 417.50 418.375
2007.75 473 419.25 418.375
2008.00 420 423.25 421.250
2008.25 390 427.00 425.125
2008.50 410 425.75 426.375
2008.75 488 427.75 426.750
2009.00 415 430.00 428.875
2009.25 398 430.00 430.000
2009.50 419 429.75 429.875
2009.75 488 423.75 426.750
2010.00 414 NA NA
2010.25 374 NA NA

Example: Electrical equipment manufacturing

autoplot(elecequip, series = "Data", color = "grey50", alpha = 0.5) +
  autolayer(ma(elecequip, centre = T, order = 12), series = "12-MA", color = "red") +
  labs(y = "New orders index",
       title = "Electrical equipment manufacturing (Euro area)")
## Warning: Removed 12 rows containing missing values (geom_path).

6.3 Classical decomposition

Additive decomposition and multiplcative decomposition

elecequip %>% decompose(type = "multiplicative") %>%
  autoplot() +
  labs(title = "Classical multiplicative decomposiition of electrical equipment index")

elecequip %>% decompose(type = "additive") %>%
  autoplot() +
  labs(title = "Classical additive decomposiition of electrical equipment index")

Comments on classical decompisition

ATTENTION:

  • the estimate of the trend-cycle is unavailable for the first and last few observations, and no estimate of the remainder component for the same time periods.

  • The trend-cycle estiamte tends to over-smooth rapid rises and falls in the data

  • Classical decomposition methods assume that the seasonal component repeat from year to year. in come place it may not hold true, like electricity demand pattern have changed over time as AC have become more widespread.

  • Occasionally, the values of the time series in a small number of periods may be particularly unusual. The classical method is not robust ot these kinds of unusual values.

6.4 X11 decomposition

elecequip %>%
  seas(x11 = "") -> fit

autoplot(fit) +
  labs("X11 decomposition of electrical equipment index")

ATTENTION: compare x11 and other classical time series decomposition method, we can see the X11 trend-cycle has captured the sudden fall in the data in searly 2009.

autoplot(elecequip, series = "Data") +
  autolayer(trendcycle(fit), series = "Trend") +
  autolayer(seasadj(fit), series = "Seasonally Adjusted") +
  labs(y = "new order index",
       title = "Electrical equipment manufacturing (Euro Area)") +
  scale_color_manual(values = c("grey", "blue", "red"),
                     breaks = c("Data", "Trend", "Seasonally Adjusted"))

fit %>% seasonal() %>%
  ggsubseriesplot() +
  labs(y = "Seasonal", x = "",
       title = "Seasonal sub-series plot of New order Index")

## 6.5 SEATS decomposition

SEATS stand for “Seasonal Extraction in ARIMA Time Series.” This procedure was originally developed at the Bank of Spain, and is now widely used by government agencies around the world. The procedure works only with quarterly and monthly data. so other seasonality data, such as daily, hourly data, or weekly data, require an alternative approach.

elecequip %>% 
  seas() -> fit_seats

fit_seats %>%
  autoplot() +
  labs(title = "SEATS decomposition of electrical equipment index")

6.6 STL decomposition

SYL acronymfor “Seasonal and Trend decomposition using Loess”

ATTENTION: Advantages over classical, X11, SEATS

  • can handle any type of seasinality, etc (Check book)
elecequip %>%
  stl(t.window = 13, s.window = "periodic", robust = T) %>%
  autoplot() +
  labs(title = "STL of new order index")

6.7 Measuring strength of trend and seasonality

strength of trend as

\(F_T = max(0, 1 - \frac{Var(R_t)}{Var(T_t + R_t)}\)

strendth of seasonality as

\(F_S = max(0, 1 - \frac{Var(R_t)}{Var(S_t + R_t)}\)

These method ive a measurement of strength of trend and seasonality, if series with seasonal or trend strength \(F_S\) \(F_T\) is close to 1, we can say the data exhibut strong seasonality/trend.

ATTENTION: These method could be useful if you want to analyze a large collection of time series, and want to find

6.8 Forecasting with decomposition

Example: Electrical equipment manufacturing

fit <- stl(elecequip, t.window = 13, s.window = "periodic", robust = T)

fit %>% 
  seasadj() %>%
  naive() %>%
  autoplot() +
  labs(y = "New orders index", x = "",
       title = "Naive forecasts of seasonally adjusted data")

ATTENTION: This is the naive forecasts of the seasonally adjusted electrical equipment orders data. We can “reseasonalised” by adding the seasonal naive forecasts of the seasonal component.

fit %>% forecast(method = "naive") %>%
  autoplot() +
  labs(y = "New orders index", title = "Forecasts from STL + Random Walk",
       x = "")

6.9 Exercises

  1. Show that a \(3 \times 5\) MA is equivalent to a 7-term weighted moving average with weights of 0.067, 0.133, 0.200, 0.200, 0.200, 0.133, and 0.067.

use beer2 as example

beer2_data %>%
  mutate(ma_5ma = ma(observation, order = 5, centre = F),
         ma_3_5ma = ma(ma_5ma, order = 3, centre = T))  %>%
  head(10) %>%
  knitr::kable()
date observation ma_5ma ma_3_5ma
1992.00 443 NA NA
1992.25 410 NA NA
1992.50 420 447.6 NA
1992.75 532 443.2 444.6667
1993.00 433 443.2 449.3333
1993.25 421 461.6 449.9333
1993.50 410 445.0 447.0667
1993.75 512 434.6 438.2000
1994.00 449 435.0 442.9333
1994.25 381 459.2 445.4000
  1. The plastics data set consists of the monthly sales (in thousands) of product A for a plastics manufacturer for five years.
  1. Plot the time series of sales of product A. Can you identify seasonal fluctuations and/or a trend-cycle?
autoplot(plastics)

we can see the seasonal fuctuations and upward trend.

  1. Use a classical multiplicative decomposition to calculate the trend-cycle and seasonal indices.
plastics %>% decompose(type = "multiplicative") ->
  fit_multi

fit_multi %>%
  autoplot()

  1. Do the results suppor the graphical interpretation from part a?

Yes, it does

  1. Compute and plot the seasonally adjusted data
seasadj(fit_multi)
##         Jan       Feb       Mar       Apr       May       Jun       Jul
## 1  967.3468  981.2262  999.3182  986.4758  985.8925  956.7826 1001.1759
## 2  966.0431  985.4495  996.7427 1023.8257 1051.9377 1057.0417 1108.5982
## 3 1168.1168 1116.3736 1139.6864 1158.9443 1152.4413 1146.0648 1119.7701
## 4 1239.8204 1212.1029 1207.9388 1218.2647 1219.4437 1229.0378 1277.0364
## 5 1342.8129 1452.8342 1450.0416 1411.6051 1405.1361 1414.8628 1384.4587
##         Aug       Sep       Oct       Nov       Dec
## 1  992.4139  981.0263  951.4241  978.9119  939.8819
## 2 1100.9592 1089.0366 1090.2260 1074.6860 1081.5244
## 3 1171.9625 1196.2349 1222.2981 1179.5334 1227.9683
## 4 1269.0820 1302.6210 1345.9580 1414.4319 1451.2352
## 5 1312.3369 1240.9008 1194.5377 1128.1178 1215.9647
autoplot(plastics, series = "Data", color = "grey50") +
  autolayer(seasadj(fit_multi), series = "Seasonally Adjusted", color = "red")

  1. Change one observation to be an outlier (e.g., add 500 to one observation), and recompute the seasonally adjusted data. What is the effect of the outlier?
plastics_outlier <- plastics
plastics_outlier[40] <- plastics_outlier[40] + 500

plastics_outlier %>% decompose(type = "multiplicative") ->
  fit_out

autoplot(fit_out)

a <- autoplot(plastics_outlier, series = "Data", color = "grey50") +
  autolayer(seasadj(fit_out), series = "Seasonally Adjusted", color = "red")

a

b <- autoplot(plastics, series = "Data", color = "grey50") +
  autolayer(seasadj(fit_multi), series = "Seasonally Adjusted", color = "red")

grid.arrange(a,b, ncol = 1)

outlier have little effect on general trend, but have effect on seasonaly adjusted data, just like original data.

  1. Does it make any difference if the outlier is near the end rather than in the middle of the time series?

if the outlier is near the end, same amount of increase have less effect than in the middle of the time series. Because the trend is general upward, to have same amount of effect, the outlier shouold be larger in the end than middle.

  1. Recall your retail time series data (from Exercise 3 in Section 2.10). Decompose the series using X11. Does it reveal any outliers, or unusual features that you had not noticed previously?
retaildata <- readxl::read_excel("/Users/yifeiliu/Documents/R/data/book_exercise/forecasting/retail.xlsx", skip = 1)

myts <- ts(retaildata[,"A3349873A"],
  frequency=12, start=c(1982,4))

autoplot(myts)

myts %>% seas(x11 = "") -> fit

autoplot(fit)

classical_fit <- myts %>% decompose(type = "multiplicative")

autoplot(classical_fit)

put the classical decomposition with X11 side by side, we can observer X11 decomposite result should large outlier around 2010-2014. And X11 decomposite result also show seasonal effect start decrease around the end of dataset.

  1. Figures 6.16 and 6.17 show the result of decomposing the number of persons in the civilian labour force in Australia each month from February 1978 to August 1995.
stl_labour <- stl(labour, s.window = 13, robust = TRUE)

autoplot(stl_labour) +
  labs(title = "STL decomposition of civilian labour force")

ggsubseriesplot(seasonal(stl_labour)) +
  labs(y = "seasonal", title = "")

  1. Write about 3–5 sentences describing the results of the decomposition. Pay particular attention to the scales of the graphs in making your interpretation.
autoplot(labour, series = "Data", color = "grey50") +
  autolayer(trendcycle(stl_labour), series = "Trend", color = "red") +
  autolayer(seasadj(stl_labour), series = "Seasonally Adjusted", color = "blue") +
  labs(x = "", y = "", title = "Number of persons in the civilian labour force in Australia each month")

overall, the numer of people in the civilian labour force increase over time. There were some recession during 1991-1992, seasonally adjusted data were able to capture this event. From subseriesplot, we can observe the seasonal fluctuation is relative small.

  1. Is the recession of 1991/1992 visible in the estimated components?

It’s visible in seasonal adjusted data.

  1. This exercise uses the cangas data (monthly Canadian gas production in billions of cubic metres, January 1960 – February 2005).
  1. Plot the data using autoplot(), ggsubseriesplot() and ggseasonplot() to look at the effect of the changing seasonality over time. What do you think is causing it to change so much?
autoplot(cangas)

ggsubseriesplot(cangas)

ggseasonplot(cangas, polar = T)

In the early stage, the production of gas higher in winer than summer, due to cold weather. But the production of gas goes up during summer as time goes on, probably due to increase use of AC in the summer.

  1. Do an STL decomposition of the data. You will need to choose s.window to allow for the changing shape of the seasonal component.
fit_stl <- stl(cangas, s.window = 13, robust = T)

autoplot(fit_stl) +
  labs(title = "Monthly Canadian gas production",
       subtitle = "STL method", x= "")

we can observe the seasonal component increase at the begining of 70 and decrease around 90. And from trend component we can observe a general upward trend, but the trend is also flat during 70-87 period.

autoplot(cangas, series = "Data", color = "grey50") +
  autolayer(trendcycle(fit_stl), series = "Trend", color = "red") +
  autolayer(seasadj(fit_stl), series = "Seasonally Adjusted", color = "blue") +
  labs(x = "", y = "", title = "Monthly Canadian gas production, STL decomposition")

  1. ompare the results with those obtained using SEATS and X11. How are they different?
fit_seats <- cangas %>% seas()
fit_x11 <- cangas %>% seas(x11 = "")

autoplot(fit_x11)

autoplot(fit_seats)

seas method use multiplicative decompisition method, so the mean of remainder for both SEATS and X11 are one. and mean of remainder for STL is zero. The trend is same among all three method, both increase and remain the same then increase again.

For seasonal component, we can observe that the seas method have a high seasonal compoent then decrease and increase. For STL method the seasoanl compoent is increase in the middle of data and decrease again.

  1. We will use the bricksq data (Australian quarterly clay brick production. 1956–1994) for this exercise.
  1. Use an STL decomposition to calculate the trend-cycle and seasonal indices. (Experiment with having fixed or changing seasonality.)
# fixed seasonality 
fit_stl_fix <- bricksq %>% stl(s.window = "period", robust = T)
fit_stl_float <- bricksq %>% stl(s.window = 5, robust = T)

a <- autoplot(fit_stl_fix) + labs(title = "fixed", x = "")
b <- autoplot(fit_stl_float) + labs(title = "floated", x= "")

grid.arrange(a,b, ncol = 2)

seasonal component is constant in fixed STL method and change over time in float method.

  1. Compute and plot the seasonally adjusted data.
autoplot(bricksq, series = "Data", color = "grey50") +
  autolayer(seasadj(fit_stl_fix), series = "Seasonally Adjusted", color = "blue") +
  autolayer(seasadj(fit_stl_float), series = "Seasonally Adjusted", color = "red") +
  labs(x = "", y = "")

By plotting the seasonally adjusted data on the same chart, we can see the floating method is more smooth than the fixed method.

  1. Use a naïve method to produce forecasts of the seasonally adjusted data.
for_naive <- naive(bricksq)

autoplot(for_naive)

fit_stl_fix %>% forecast(method = "naive") -> for_fix


fit_stl_float %>% forecast(method = "naive") -> for_float

autoplot(bricksq) +
  autolayer(for_fix, series = "Fixed") +
  autolayer(for_float, series = "Floated")

The range of prediction interval from two method are almost identical.

  1. Use stlf() to reseasonalise the results, giving forecasts for the original data.
bricksq %>% stlf() -> for_stlf


autoplot(for_stlf) + labs(x = "", y = "", title = "stlf")

checkresiduals(for_stlf)
## Warning in checkresiduals(for_stlf): The fitted degrees of freedom is based
## on the model used for the seasonally adjusted data.

## 
##  Ljung-Box test
## 
## data:  Residuals from STL +  ETS(M,N,N)
## Q* = 41.128, df = 6, p-value = 2.733e-07
## 
## Model df: 2.   Total lags used: 8
Box.test(for_stlf$residuals, lag = 10, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  for_stlf$residuals
## X-squared = 42.86, df = 10, p-value = 5.267e-06
x <- Box.test(for_stlf$residuals, lag = 10, type = "Ljung-Box")

The residuals p-value is 5.267418510^{-6}